/* b_figA1_psc_hist.do *************************************************************

	This code creates Figure A1, the histogram of propensity scores in the sample,
  shaded by offer rate

******************************************************************************/

use "$stata_data_analysis/final_sample.dta", clear

* Use the college outcomes sample - two-way bar graph with shading based on offer rates
preserve
	keep if !inlist(pformany,0,1) & in_K1
	count
	local N = r(N)
	* generate 50 bins for p-scores
	egen psc_bins = cut(pformany), at (0(.02)1)

	bysort psc_bins: gen total_per_psc = _N
	bysort psc_bins: egen sum_offers_psc = sum(soffany)
	gen offer_rate_psc = sum_offers_psc / total_per_psc

	keep psc_bins total_per_psc offer_rate_psc

	duplicates drop

	graph twoway (bar total_per_psc psc_bins if offer_rate_psc >= 0 & offer_rate_psc < .20, fintensity(inten20) color(edkblue) barwidth(.02)) ///
	(bar total_per_psc psc_bins if offer_rate_psc >= .20 & offer_rate_psc < .40, fintensity(inten40) color(edkblue) barwidth(.02) ) ///
	(bar total_per_psc psc_bins if offer_rate_psc >= .40 & offer_rate_psc < .60, fintensity(inten60) color(edkblue) barwidth(.02)) ///
	(bar total_per_psc psc_bins if offer_rate_psc >= .60 & offer_rate_psc <= .80, fintensity(inten80) color(edkblue) barwidth(.02)) ///
	(bar total_per_psc psc_bins if offer_rate_psc >= .80 & offer_rate_psc <= 1, fintensity(inten100) color(edkblue) barwidth(.02)) ///
	, xtitle("Propensity scores") ytitle("Frequency") graphregion(fcolor(white)) scheme(s2color) caption("Note: This figure plots the distribution of non-degenerate propensity scores, with shading based on offer rates within bins." "The sample is the same as that used in Table 3 on college-going outcomes (Table N = `N').", size(vsmall)) legend(order(1 "[0, .2)" 2 "[0.2, 0.4)" 3 "[0.4, 0.6)" 4 "[0.6, 0.8)" 5 "[0.8, 1.0]") rows(2) subtitle("Offer rate:"))
restore

graph export "$figures/pscore_dist_offrate_shading_NSC.pdf", replace
